
get_med_hhi = function(y, st){
  # set CENSUS_API_KEY per tidycensus instructions if necessary
  get_acs(
    geography = 'tract',
    variables = 'B25119_002',
    year = y,
    state = st
    ) %>%
    select(GEOID, med_hhi = estimate)
}

get_year = function(y){
  lapply(c('DC', state.abb), \(x) bind_rows(get_med_hhi(y, x)) %>% mutate(year = y))
}

tr = lapply(c(2012, 2016, 2020), \(z) get_year(z)) %>% bind_rows()

fl = open_dataset('data/final_long') %>%
  filter(!is.na(GEOID), cycle %in% c(2012, 2016, 2020), in_l2 == 1, in_cl == 1) %>%
  group_by(year = cycle, GEOID) %>%
  summarise(
    n = n(),
    med_worth = median(total, na.rm = T),
    mean_worth = mean(total, na.rm = T)
    ) %>%
  collect()

z = merge(fl, tr, by = c('GEOID', 'year'), all = F)
z = z[n > 49]

mhhi10_90 = quantile(z$med_hhi, c(0.1, 0.9), na.rm = T)
mw10_90 = quantile(z$med_worth, c(0.1, 0.9), na.rm = T)

z[, med_hhi_quant :=
    quantile(med_hhi, probs=c(seq(0, 0.9, 0.1), 0.95, 0.99, 0.995), na.rm = T) %>%
    cut(med_hhi, ., include.lowest = T, labels = F),
  year
  ]

z = z[!is.na(med_hhi_quant)]

save(z, file = 'summary_data/figA4.rda')
